%
%The bibitems are sorted by pulication date and author name.
%

@ARTICLE{CHAKRABARTI99,
AUTHOR= "S. Chakrabarti, M. van der Berg, and B. Dom",
FULLAUTHOR ="dont know how to write now",
TITLE = "Focused crawling: a new approach to topic-specific web resource discovery",
JOURNAL = "Proceeding of the 8th International World Wide Web Conference (WWW8)",
YEAR = "1999",
}

@misc{MAtthewGray-wanderer,
Author="Matthew Gray",
Title = "Internet Growth and Statistics: Credits and Background",
Url = "http://www.mit.edu/people/mkgray/net/background.html",
}

@misc{ angkawattanawit-learnable,
  author = "Niran Angkawattanawit and Arnon Rungsawang",
  title = "Learnable Crawling: An Efficient Approach to Topic-specific Web Resource
    Discovery",
  url = "citeseer.ist.psu.edu/angkawattanawit02learnable.html" }

@misc{ barfourosh-information,
author = "A. Abdollahzadeh Barfourosh and HR Motahary Nezhad and ML Anderson and D. Perlis",
title ="Information Retrieval on the World Wide Web and Active Logic: A Survey and Problem Definition",
url = "citeseer.ist.psu.edu/barfourosh02information.html",
journal = "Technical Report",
YEAR = "1999",
}

@misc{ bergmark-focused,
  author = "Donna Bergmark and Carl Lagoze and Alex Sbityakov",
  title = "Focused Crawls, Tunneling, and Digital Libraries",
  url = "citeseer.ist.psu.edu/article/bergmark02focused.html" }

@misc{ boldi02ubicrawler,
  author = "P. Boldi and B. Codenotti and M. Santini and S. Vigna",
  title = "Ubicrawler: A scalable fully distributed web crawler",
  text = "Paolo Boldi, Bruno Codenotti, Massimo Santini, and Sebastiano Vigna. Ubicrawler:
    A scalable fully distributed web crawler. In Proc. AusWeb02. The Eighth
    Australian World Wide Web Conference, 2002.",
  year = "2002",
  url = "citeseer.ist.psu.edu/boldi03ubicrawler.html" }

@misc{ castillo01new,
  author = "Ricardo Baeza-Yates and Carlos Castillo",
  title = "A New Model for Web Crawling",
  year = "2001",
  url = "citeseer.ist.psu.edu/castillo01new.html" }

@inproceedings{ chakrabarti98enhanced,
    author = "Soumen Chakrabarti and Byron E. Dom and Piotr Indyk",
    title = "Enhanced hypertext categorization using hyperlinks",
    booktitle = "Proceedings of {SIGMOD}-98, {ACM} International Conference on Management of Data",
    publisher = "ACM Press, New York, US",
    address = "Seattle, US",
    editor = "Laura M. Haas and Ashutosh Tiwary",
    pages = "307--318",
    year = 1998,
    url = "citeseer.ist.psu.edu/chakrabarti98enhanced.html" }

@inproceddings{domain-specific-search-engines,
author = "A. McCallum, K. Nigam, J. Rennie, and K. Seymore.",
title = "Building domain-specific search engines with machine learning techniques.",
booktitle = "AAAI-99 Spring Symposium",
year = "1999",
url = "http://www.cs.cmu.edu/~mccallum/papers/cora-aaaiss99.ps.gz."
}

@inproceddings{reinforcement-learning-spider-web,
author = "J. Rennie and A. McCallum.",
title = "Using reinforcement learning to spider the web efficiently.",
booktitle = "ICML-99",
year = "1999",
url = "http://www.cs.cmu.edu/~mccallum/ papers/rlspider-icml99s.ps.gz."
}

@inproceddings{google-crawler,
author = "Sergey Brin and Lawrence Page.",
title = "The anatomy of a large-scale hypertextual Web search engine.",
booktitle = "Proceedings of the Seventh International World Wide Web Conference",
pages = "107-117",
year = "1998",
MONTH = "April"
}

@article{ chakrabarti98scalable,
    author = "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal and Prabhakar Raghavan",
    title = "Scalable Feature Selection, Classification and Signature Generation for Organizing Large Text Databases into Hierarchical Topic Taxonomies",
    journal = "VLDB Journal: Very Large Data Bases",
    volume = "7",
    number = "3",
    pages = "163--178",
    year = "1998",
    url = "citeseer.ist.psu.edu/chakrabarti98scalable.html" }

@misc{ chau-personalized,
  author = "Michael Chau and Hsinchun Chen",
  title = "Personalized and Focused Web Spiders",
  url = "citeseer.ist.psu.edu/chau03personalized.html" }
    
@inproceedings{ cho02parallel,
  author = "J. Cho and H. Garcia-Molina",
  title = "Parallel crawlers",
  booktitle = "Proc. of the 11th International World--Wide Web Conference",
  year = "2002",
  url = "citeseer.ist.psu.edu/cho02parallel.html" }

@article{ cho98efficient,
    author = "Junghoo Cho and Hector Garc{\'\i}a-Molina and Lawrence Page",
    title = "Efficient crawling through {URL} ordering",
    journal = "Computer Networks and ISDN Systems",
    volume = "30",
    number = "1--7",
    pages = "161--172",
    year = "1998",
    url = "citeseer.ist.psu.edu/article/cho98efficient.html" }

@inproceedings{ diligenti00focused,
  author = "Michelangelo Diligenti and Frans Coetzee and Steve Lawrence and C. Lee Giles and Marco Gori",
  title = "Focused Crawling using Context Graphs",
  booktitle = "26th International Conference on Very Large Databases, VLDB 2000",
  address = "Cairo, Egypt",
  month = "10--14 September",
  year = "2000",
  url = "citeseer.ist.psu.edu/article/diligenti00focused.html" }  

@misc{ ehrig-ontologyfocused,
  author = "Marc Ehrig and Alexander Maedche",
  title = "Ontology-Focused Crawling of Web Documents",
  url = "citeseer.ist.psu.edu/580795.html" }

@misc{ ester-focused,
  author = "Martin Ester and Matthias Gross and Hans-Peter Kriegel",
  title = "Focused Web Crawling: A Generic Framework for Specifying the User Interest
    and for Adaptive Crawling Strategies",
  url = "citeseer.ist.psu.edu/456508.html" }

@inproceedings{ fiedler99using,
    author = "Jan Fiedler and Joachim Hammer",
    title = "{U}sing the {W}eb {E}fficiently:  {M}obile {C}rawlers",
    booktitle = "Proceedings of the Seventeenth AoM/IAoM International Conference on Computer Science",
    address = "San Diego, CA",
    pages = "324--329",
    year = "1999",
    url = "citeseer.ist.psu.edu/article/fiedler99using.html" }  
  
@inproceedings{ gp+ps+fm:explore_exploit,
  author = "Gautam Pant and Padmini Srinivasan and Filippo Menczer",
  title = "{E}xploration versus {E}xploitation in 
               {T}opic {D}riven {C}rawlers",
  booktitle = "Proceedings of the Second International Workshop on 
               Web Dynamics",
  year = "2002",
  month = may,
  address = "Honolulu, Hawaii",
  url = "citeseer.ist.psu.edu/pant02exploration.html",
  url = "citeseer.nj.nec.com/pant02exploration.html" }

@article{ heydon99mercator,
    author = "Allan Heydon and Marc Najork",
    title = "Mercator: A Scalable, Extensible Web Crawler",
    journal = "World Wide Web",
    volume = "2",
    number = "4",
    pages = "219-229",
    year = "1978",
    url = "citeseer.ist.psu.edu/heydon99mercator.html" }

@article{ kaelbling96reinforcement,
    author = "Leslie Pack Kaelbling and Michael L. Littman and Andrew P. Moore",
    title = "Reinforcement Learning: A Survey",
    journal = "Journal of Artificial Intelligence Research",
    volume = 4,
    pages = "237-285",
    year = 1996,
    url = "citeseer.ist.psu.edu/kaelbling96reinforcement.html" }

@inproceedings{ kumar00stochastic,
    author = "Kumar and Raghavan and Rajagopalan and Sivakumar and Tomkins and Upfal",
    title = "Stochastic Models for the Web Graph",
    booktitle = "{FOCS}: {IEEE} Symposium on Foundations of Computer Science ({FOCS})",
    year = "2000",
    url = "citeseer.ist.psu.edu/501628.html" }  


@Article{ lawrence99digital,
  author = "Steve Lawrence and C. Lee Giles and Kurt Bollacker",
  title		= "Digital Libraries and Autonomous Citation Indexing",
  journal	= "IEEE Computer",
  volume	= "32",
  number	= "6",
  pages	= "67--71",
  year		= "1999"
}

@inproceedings{ lewis98naive,
    author = "David D. Lewis",
    title = "Naive ({B}ayes) at forty: The independence assumption in information retrieval.",
    booktitle = "Proceedings of {ECML}-98, 10th European Conference on Machine Learning",
    number = "1398",
    publisher = "Springer Verlag, Heidelberg, DE",
    address = "Chemnitz, DE",
    editor = "Claire N{\'{e}}dellec and C{\'{e}}line Rouveirol",
    pages = "4--15",
    year = "1998",
    url = "citeseer.ist.psu.edu/lewis98naive.html" }
  
@inproceedings{ measuringsearchengine,
author = "M.Henzinger, A.Heydon, M.Mitzenmacher, and M. Najork",
title = "Measuring search engine quality using random walks on the web",
booktitle = "Proceedings of the 8th International World Wide Web Conference",
pages = "213--225",
year = "1999",
}  

@inproceedings{ menczer01evaluating,
    author = "Filippo Menczer and Gautam Pant and Padmini Srinivasan and Miguel E. Ruiz",
    title = "Evaluating Topic-Driven Web Crawlers",
    booktitle = "Research and Development in Information Retrieval",
    pages = "241--249",
    year = "2001",
    url = "citeseer.ist.psu.edu/menczer01evaluating.html" }

@misc{ menczer02topicdriven,
  author = "F. Menczer and G. Pant and P. Srinivasan",
  title = "Topic-driven crawlers: Machine learning issues",
  text = "F Menczer, G Pant, and P Srinivasan. Topic-driven crawlers: Machine learning
    issues. ACM TOIT, Submitted, 2002. http://dollar.biz.uiowa.edu/fil/Papers/TOIT.pdf.",
  year = "2002",
  url = "citeseer.ist.psu.edu/menczer02topicdriven.html" }  

@misc{ najork-highperformance,
author = "M. Najork and A. Heydon",
title = "On High-Performance Web Crawling",
text = "Marc Najork and Allan Heydon",
MONTH = "September",
YEAR = "2001",
}

@techreport{ page98pagerank,
    author = "Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd",
    institution = "Stanford Digital Library Technologies Project",
    title = "The PageRank Citation Ranking: Bringing Order to the Web",
    year = "1998",
    url = "citeseer.ist.psu.exdu/page98pagerank.html" }

@misc{ pant-crawling,
  author = "Gautam Pant and Padmini Srinivasan and Filippo Menczer",
  title = "Crawling the Web",
  url = "citeseer.ist.psu.edu/579280.html" }

@inproceedings{ ps+gp+fm:crawler_topical_performance,
    author = "Padmini Srinivasan and Gautam Pant and Filippo Menczer",
    title = "Target Seeking Crawlers and their Topical Performance",
    booktitle = "The 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval",
    month = "August",
    year = "2002",
    address = "Tampere, Finland",
    url = "citeseer.ist.psu.edu/srinivasan02target.html",
    url = "citeseer.nj.nec.com/srinivasan02target.html" }


@inproceedings{ rennie99using,
    author = "Jason Rennie and Andrew Kachites McCallum",
    title = "Using reinforcement learning to spider the {W}eb efficiently",
    booktitle = "Proceedings of {ICML}-99, 16th International Conference on Machine Learning",
    publisher = "Morgan Kaufmann Publishers, San Francisco, US",
    address = "Bled, SL",
    editor = "Ivan Bratko and Saso Dzeroski",
    pages = "335--343",
    year = 1999,
    url = "citeseer.ist.psu.edu/article/rennie99using.html" }

@techreport{ sebastiani99machine,
    author = "Fabrizio Sebastiani",
    title = "Machine learning in automated text categorisation: a survey",
    number = "IEI-B4-31-1999",
    address = "Pisa, IT",
    year = 1999,
    url = "citeseer.ist.psu.edu/sebastiani99machine.html" }

@inproceedings{ sharksearch,
    author = "M. Hersovici, M. Jacovi, Y. S. Maarek, D. Pelleg, M.Shtalhaim, and S. Ur",
    title = "The shark-search algorith ---- An application: Tailored Web site mapping",
    booktitle = "the seventh international conference on World Wide Web 7",
    year = "1998"
    }


@inproceedings{ shkapenyuk02design,
    author = "Vladislav Shkapenyuk and Torsten Suel",
    title = "Design and Implementation of a High-Performance Distributed Web Crawler",
    booktitle = "{ICDE}",
    year = "2002",
    url = "citeseer.ist.psu.edu/shkapenyuk02design.html" }



@misc{ srinivasan02general,
  author = "P. Srinivasan and G. Pant and F. Menczer",
  title = "A general evaluation framework for topical crawlers",
  text = "P. Srinivasan, G. Pant, and F. Menczer. A general evaluation framework
    for topical crawlers. IEEE Trans. on Knowledge and Data Engineering, Submitted,
    2002.",
  year = "2002",
  url = "citeseer.ist.psu.edu/srinivasan02general.html" }


@inproceedings{ www-crawling-1,
author = "David Eichmann",
title = "The RBSE Spider-- Balancing Effective Search Against Web Load",
booktitle = "In Proceedings of the First International World Wide Web Conference",
pages = "943--947",
year = "1994",
}


@inproceedings{ www-crawling-2,
author = "Oliver A. McBryan",
title = "GENVL and WWWW: Tools for Taming the Web",
booktitle = "Proceedings of the First International World Wide Web Conference",
pages = "79--90",
year = "1994",
}

@inproceedings{ www-crawling-3,
author = "Brian Pinkerton",
title = "Finding What People Want: Experiences with the WebCrawler",
booktitle = "Proceedings of the Second International World Wide Web Conference",
year = "1994",
}

@article{ yang02study,
    author = "Yiming Yang and Sean Slattery and Rayid Ghani",
    title = "A Study of Approaches to Hypertext Categorization",
    journal = "Journal of Intelligent Information Systems",
    volume = "18",
    number = "2-3",
    pages = "219-241",
    year = "2002",
    url = "citeseer.ist.psu.edu/478602.html" }


@inproceedings{ raghavan01crawling,
    author = "Sriram Raghavan and Hector Garcia-Molina",
    title = "Crawling the Hidden Web",
    booktitle = "Proceedings of the Twenty-seventh International Conference on Very Large Databases",
    year = 2001,
    url = "citeseer.ist.psu.edu/article/raghavan01crawling.html"}



@article{ mccallum00automating,
    author = "Andrew K. McCallum and Kamal Nigam and Jason Rennie and Kristie Seymore",
    title = "Automating the Construction of Internet Portals with Machine Learning",
    journal = "Information Retrieval",
    volume = 3,
    number = 2,
    publisher = "Kluwer Academic Publishers",
    pages = "127--163",
    year = 2000,
    url = "citeseer.ist.psu.edu/article/mccallum99automating.html" }

@inproceedings{ mn+jw:bfs_hiqh_quality,
    author = "Marc Najork and Janet L. Wiener",
    title = "{B}readth-{F}irst {C}rawling {Y}ields 
                 {H}igh-{Q}uality {P}ages",
    booktitle = "Proceedings of the 10th International 
                 World Wide Web Conference",
    publisher = "Elsevier Science",
    address = "Hong Kong",
    pages = "114--118",
    month = "May",
    year = "2001",
    url = "citeseer.ist.psu.edu/najork01breadthfirst.html",
    url = "citeseer.nj.nec.com/najork01breadthfirst.html" }    